# -*- coding: utf-8 -*-
import scrapy
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
import time
from bs4 import BeautifulSoup
from scrapy import Request
import json
from scrapyfundajj.items import fundajjItem

class FundajjSpider(scrapy.Spider):
    name = 'fundajj'
    #allowed_domains = ['http://fund.10jqka.com.cn']
    start_urls = ['http://fund.10jqka.com.cn/datacenter/jz/']


    #解析由http://fund.10jqka.com.cn/datacenter/jz/ 防止返回的response，返回的网址为http://fund.10jqka.com.cn/000074/historynet.html#historynet格式
    def parse(self, response):
        soup = BeautifulSoup(response.text, 'lxml')
        funds = soup.find_all(name='tbody', id='containerMain')
        for fund in funds:
            for f in fund.find_all(name='tr', rel='tpl'):
                for h in f.find_all(name='a', field='name'):
                    url = h['href'] + 'historynet.html#historynet'
                    yield Request(url = url , callback= self.parse_fund_detail)

    def parse_fund_detail(self,response):
        url = response.url
        prod_id = url.split('/')[-2]
        soup = BeautifulSoup(response.text, 'lxml')
        datas = soup.find_all(name='script', type='text/javascript')
        for data in datas:
            if data.text == '':
                continue
            else:
                item = fundajjItem()
                data = data.text
                data_list = data.split('=')
                d_list = data_list[1].replace(';', '')
                dj_list = json.loads(d_list)
                for dj in dj_list:
                    dj['fundid'] = prod_id
                    item['product_id'] = dj.get('fundid')
                    item['date'] = dj.get('date')
                    item['net'] = dj.get('net')
                    item['totalnet'] = dj.get('totalnet')
                    item['fqnet'] = dj.get('fqnet')
                    item['inc'] = dj.get('inc')
                    item['rate'] = dj.get('rate')
                    yield item

